How to do it?:

Submission: Submit the link on Github of the assignment to Canvas


  1. Write the following function. Give examples to test your function.

Hint: Similar function

library(tidyverse)
setwd("C:/Users/student/Downloads")
df <- read_csv('adult_census_missing.csv')


mean_impute <- function(x)
{
  if(is.numeric(x))
  {
    # Find the mode of x
    mean_of_x <- mean(x, na.rm = TRUE)

    # Replace the missing by the mode
    library(tidyr)
    x <- replace_na(x, mean_of_x) 
  }
return(x)    
}



numeric_impute <- function(d)
{
  for (i in 1:length(d))
  {
      d[[i]] <- mean_impute(d[[i]])
  }
  return(d)
}

colSums(is.na(df))
##            age      workclass         fnlwgt      education  education.num 
##             30             34              0             15              0 
## marital.status     occupation   relationship           race            sex 
##             26             35              0              0             24 
##   capital.gain   capital.loss hours.per.week native.country         income 
##              8              0              0             15              0
d1 <- numeric_impute(df)
colSums(is.na(d1))
##            age      workclass         fnlwgt      education  education.num 
##              0             34              0             15              0 
## marital.status     occupation   relationship           race            sex 
##             26             35              0              0             24 
##   capital.gain   capital.loss hours.per.week native.country         income 
##              0              0              0             15              0

  1. Write the following function. Give examples to test your function.

Hint: Use If-statement to combine the function in Problem 1 and the function in this example

mode_impute <- function(x)
{
  if(is.numeric(x))
  {
    # Find the mode of x
    mean_of_x <- mean(x, na.rm = TRUE)

    # Replace the missing by the mode
    library(tidyr)
    x <- replace_na(x, mean_of_x) 
}

  else if (!is.numeric(x))
  {
    # Find the mode of x
    mode_of_x <- names(sort(-table(x)))[1]
    # Replace the missing by the mode
    library(tidyr)
    x <- replace_na(x, mode_of_x) 
  }
return(x)    
}


numeric_impute <- function(d)
{
  for (i in 1:length(d))
  {
      d[[i]] <- mode_impute(d[[i]])
  }
  return(d)
}

colSums(is.na(df))
##            age      workclass         fnlwgt      education  education.num 
##             30             34              0             15              0 
## marital.status     occupation   relationship           race            sex 
##             26             35              0              0             24 
##   capital.gain   capital.loss hours.per.week native.country         income 
##              8              0              0             15              0
d1 <- numeric_impute(df)
colSums(is.na(d1))
##            age      workclass         fnlwgt      education  education.num 
##              0              0              0              0              0 
## marital.status     occupation   relationship           race            sex 
##              0              0              0              0              0 
##   capital.gain   capital.loss hours.per.week native.country         income 
##              0              0              0              0              0

  1. Write the following function. Give examples to test your function.

Hint: Similar function

density_plot <- function(d)
{
  library(ggplot2)
  for (i in 1:length(d))
  {
    if (!is.numeric(d[[i]]))
    {
      print(ggplot(d, aes(x = d[[i]]))+ 
              geom_bar(position='fill')+
              labs(x = names(d)[i]))
    }
  }
}

density_plot(df)


  1. Write the following function. Give examples to test your function.

Hint: Similar function

density_plot2 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (!is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], fill = d[[j]]))+ 
              geom_bar()+labs(x = names(d)[i], fill = names(d)[j]))
    }
  }
}

density_plot2(df)


  1. Write the following function. Give examples to test your function.

Hint: Combine this function, this function, and the function in Question 4. One way to combine is creating a new function, quick_plot, and call these three functions within quick_plot.

density_plot3 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (!is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], fill = d[[j]]))+ 
              geom_bar()+labs(x = names(d)[i], fill = names(d)[j]))
    }
      
    else if (is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], color = d[[j]]))+ 
              geom_density()+labs(x = names(d)[i], color = names(d)[j]))
    }
    else if (is.numeric(d[[i]])& (is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], y = d[[j]]))+ 
              geom_point(position='dodge')+labs(x = names(d)[i], y = names(d)[j]))
    } 
    
    }
  

}

density_plot3(df)
## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Warning: Removed 30 rows containing missing values (`geom_point()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Removed 30 rows containing missing values (`geom_point()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Warning: Removed 38 rows containing missing values (`geom_point()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Warning: Removed 30 rows containing missing values (`geom_point()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Removed 30 rows containing missing values (`geom_point()`).

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 30 rows containing non-finite values (`stat_density()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Warning: Removed 8 rows containing missing values (`geom_point()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Warning: Removed 8 rows containing missing values (`geom_point()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Warning: Removed 8 rows containing missing values (`geom_point()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`
## Removed 8 rows containing missing values (`geom_point()`).

## Warning: Removed 8 rows containing non-finite values (`stat_density()`).
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 8 rows containing non-finite values (`stat_density()`).

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

## Warning: Groups with fewer than two data points have been dropped.
## no non-missing arguments to max; returning -Inf

## Warning: Groups with fewer than two data points have been dropped.
## no non-missing arguments to max; returning -Inf